# Data pre-processing
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import openpyxl
import pandas as pd
import numpy as np
import missingno as msno
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
#Data visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly as pl
import plotly.express as px
import plotly.graph_objects as go
%matplotlib inline
#K-Nearest Neighbor Classification
import sklearn
from sklearn import neighbors
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
with open('football.txt', 'w', encoding='utf-8', errors='ignore') as file:
pd.read_excel('2021-06-23_Dataset_Fifa_19.xlsx').to_string(file, index=False)
#Reading a text file and storing it into a dataframe.
football= pd.read_csv('football.txt')
football.head()
| Unnamed: 0 | ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | ... | Composure | Marking | StandingTackle | SlidingTackle | GKDiving | GKHandling | GKKicking | GKPositioning | GKReflexes | Release Clause Unnamed: 1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 158023 | L. Messi | 31 | https://cdn.sofifa.org/players/4/19/158023.png | Argentina | https://cdn.sofifa.org/flags/52.png | 94 | 94 | FC Barcelona | ... | 96.0 | 33.0 | 28.0 | 26.0 | 6.0 | 11.0 | 15.0 | 14.0 | 8.0 | €226.5M ... |
| 1 | 1 | 20801 | Cristiano Ronaldo | 33 | https://cdn.sofifa.org/players/4/19/20801.png | Portugal | https://cdn.sofifa.org/flags/38.png | 94 | 94 | Juventus | ... | 95.0 | 28.0 | 31.0 | 23.0 | 7.0 | 11.0 | 15.0 | 14.0 | 11.0 | €127.1M ... |
| 2 | 2 | 190871 | Neymar Jr | 26 | https://cdn.sofifa.org/players/4/19/190871.png | Brazil | https://cdn.sofifa.org/flags/54.png | 92 | 93 | Paris Saint-Germain | ... | 94.0 | 27.0 | 24.0 | 33.0 | 9.0 | 9.0 | 15.0 | 15.0 | 11.0 | €228.1M ... |
| 3 | 3 | 193080 | De Gea | 27 | https://cdn.sofifa.org/players/4/19/193080.png | Spain | https://cdn.sofifa.org/flags/45.png | 91 | 93 | Manchester United | ... | 68.0 | 15.0 | 21.0 | 13.0 | 90.0 | 85.0 | 87.0 | 88.0 | 94.0 | €138.6M ... |
| 4 | 4 | 192985 | K. De Bruyne | 27 | https://cdn.sofifa.org/players/4/19/192985.png | Belgium | https://cdn.sofifa.org/flags/7.png | 91 | 92 | Manchester City | ... | 88.0 | 68.0 | 58.0 | 51.0 | 15.0 | 13.0 | 5.0 | 10.0 | 13.0 | €196.4M ... |
5 rows × 89 columns
Dataframe Dimensions
football.shape
(18207, 89)
football.columns
Index(['Unnamed: 0', 'ID', 'Name', 'Age', 'Photo', 'Nationality', 'Flag',
'Overall', 'Potential', 'Club', 'Club Logo', 'Value', 'Wage', 'Special',
'Preferred Foot', 'International Reputation', 'Weak Foot',
'Skill Moves', 'Work Rate', 'Body Type', 'Real Face', 'Position',
'Jersey Number', 'Joined', 'Loaned From', 'Contract Valid Until',
'Height', 'Weight', 'LS', 'ST', 'RS', 'LW', 'LF', 'CF', 'RF', 'RW',
'LAM', 'CAM', 'RAM', 'LM', 'LCM', 'CM', 'RCM', 'RM', 'LWB', 'LDM',
'CDM', 'RDM', 'RWB', 'LB', 'LCB', 'CB', 'RCB', 'RB', 'Crossing',
'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
'Marking', 'StandingTackle', 'SlidingTackle', 'GKDiving', 'GKHandling',
'GKKicking', 'GKPositioning', 'GKReflexes',
'Release Clause Unnamed: 1'],
dtype='object')
checking for missing values
football.isnull().values.sum()
75421
As we see column named 'Unnamed: 0' is index number, we can remove it
football=football.drop(columns=['Unnamed: 0'])
football.head
<bound method NDFrame.head of ID Name Age \
0 158023 L. Messi 31
1 20801 Cristiano Ronaldo 33
2 190871 Neymar Jr 26
3 193080 De Gea 27
4 192985 K. De Bruyne 27
... ... ... ...
18202 238813 J. Lundstram 19
18203 243165 N. Christoffersson 19
18204 241638 B. Worman 16
18205 246268 D. Walker-Rice 17
18206 246269 G. Nugent 16
Photo Nationality \
0 https://cdn.sofifa.org/players/4/19/158023.png Argentina
1 https://cdn.sofifa.org/players/4/19/20801.png Portugal
2 https://cdn.sofifa.org/players/4/19/190871.png Brazil
3 https://cdn.sofifa.org/players/4/19/193080.png Spain
4 https://cdn.sofifa.org/players/4/19/192985.png Belgium
... ... ...
18202 https://cdn.sofifa.org/players/4/19/238813.png England
18203 https://cdn.sofifa.org/players/4/19/243165.png Sweden
18204 https://cdn.sofifa.org/players/4/19/241638.png England
18205 https://cdn.sofifa.org/players/4/19/246268.png England
18206 https://cdn.sofifa.org/players/4/19/246269.png England
Flag Overall Potential \
0 https://cdn.sofifa.org/flags/52.png 94 94
1 https://cdn.sofifa.org/flags/38.png 94 94
2 https://cdn.sofifa.org/flags/54.png 92 93
3 https://cdn.sofifa.org/flags/45.png 91 93
4 https://cdn.sofifa.org/flags/7.png 91 92
... ... ... ...
18202 https://cdn.sofifa.org/flags/14.png 47 65
18203 https://cdn.sofifa.org/flags/46.png 47 63
18204 https://cdn.sofifa.org/flags/14.png 47 67
18205 https://cdn.sofifa.org/flags/14.png 47 66
18206 https://cdn.sofifa.org/flags/14.png 46 66
Club Club Logo \
0 FC Barcelona https://cdn.sofifa.org/teams/2/light/241.png
1 Juventus https://cdn.sofifa.org/teams/2/light/45.png
2 Paris Saint-Germain https://cdn.sofifa.org/teams/2/light/73.png
3 Manchester United https://cdn.sofifa.org/teams/2/light/11.png
4 Manchester City https://cdn.sofifa.org/teams/2/light/10.png
... ... ...
18202 Crewe Alexandra https://cdn.sofifa.org/teams/2/light/121.png
18203 Trelleborgs FF https://cdn.sofifa.org/teams/2/light/703.png
18204 Cambridge United https://cdn.sofifa.org/teams/2/light/1944.png
18205 Tranmere Rovers https://cdn.sofifa.org/teams/2/light/15048.png
18206 Tranmere Rovers https://cdn.sofifa.org/teams/2/light/15048.png
... Composure Marking StandingTackle SlidingTackle GKDiving \
0 ... 96.0 33.0 28.0 26.0 6.0
1 ... 95.0 28.0 31.0 23.0 7.0
2 ... 94.0 27.0 24.0 33.0 9.0
3 ... 68.0 15.0 21.0 13.0 90.0
4 ... 88.0 68.0 58.0 51.0 15.0
... ... ... ... ... ... ...
18202 ... 45.0 40.0 48.0 47.0 10.0
18203 ... 42.0 22.0 15.0 19.0 10.0
18204 ... 41.0 32.0 13.0 11.0 6.0
18205 ... 46.0 20.0 25.0 27.0 14.0
18206 ... 43.0 40.0 43.0 50.0 10.0
GKHandling GKKicking GKPositioning GKReflexes \
0 11.0 15.0 14.0 8.0
1 11.0 15.0 14.0 11.0
2 9.0 15.0 15.0 11.0
3 85.0 87.0 88.0 94.0
4 13.0 5.0 10.0 13.0
... ... ... ... ...
18202 13.0 7.0 8.0 9.0
18203 9.0 9.0 5.0 12.0
18204 5.0 10.0 6.0 13.0
18205 6.0 14.0 8.0 9.0
18206 15.0 9.0 12.0 9.0
Release Clause Unnamed: 1
0 €226.5M ...
1 €127.1M ...
2 €228.1M ...
3 €138.6M ...
4 €196.4M ...
... ...
18202 €143K ...
18203 €113K ...
18204 €165K ...
18205 €143K ...
18206 €165K ...
[18207 rows x 88 columns]>
Corelation between properties we have
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['figure.figsize'] = (9, 5)
matplotlib.rcParams['figure.facecolor'] = '#00000000'
plt.figure(figsize = (30,30))
sns.heatmap(football.corr(), annot = True, fmt = '.1f')
plt.title("Corelation between the properties of attributes of football players")
plt.show()
based on correlation above we can say that goal keeper is having more variations when, compared to other players(non-goalkeepers)
top 5 countries in FIFA-19
per_nation = football["Nationality"].value_counts()
per_nation = per_nation.reset_index()
per_nation.columns= ["Nations","Counts"]
per_nation.head()
| Nations | Counts | |
|---|---|---|
| 0 | England | 1662 |
| 1 | Germany | 1198 |
| 2 | Spain | 1072 |
| 3 | Argentina | 937 |
| 4 | France | 914 |
fig = px.bar(per_nation, y='Counts', x='Nations', text='Nations')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()
Selecting only the relevant features required to analyze the data according to the Usecases.
Football = football[['ID','Name','Age','Overall','Club','Value','Preferred Foot','Weak Foot','Position','Height','Weight','Crossing','Finishing','HeadingAccuracy','ShortPassing','Volleys','Dribbling','Curve','FKAccuracy','LongPassing',
'BallControl','Acceleration','SprintSpeed','Agility','Reactions','Balance','ShotPower','Jumping','Stamina','Strength','LongShots',
'Aggression','Interceptions','Positioning','Vision','Penalties','Composure','Marking','StandingTackle','SlidingTackle','GKDiving','GKHandling','GKKicking','GKPositioning','GKReflexes']]
Football.head()
| ID | Name | Age | Overall | Club | Value | Preferred Foot | Weak Foot | Position | Height | ... | Penalties | Composure | Marking | StandingTackle | SlidingTackle | GKDiving | GKHandling | GKKicking | GKPositioning | GKReflexes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 158023 | L. Messi | 31 | 94 | FC Barcelona | €110.5M | Left | 4.0 | RF | 5'7 | ... | 75.0 | 96.0 | 33.0 | 28.0 | 26.0 | 6.0 | 11.0 | 15.0 | 14.0 | 8.0 |
| 1 | 20801 | Cristiano Ronaldo | 33 | 94 | Juventus | €77M | Right | 4.0 | ST | 6'2 | ... | 85.0 | 95.0 | 28.0 | 31.0 | 23.0 | 7.0 | 11.0 | 15.0 | 14.0 | 11.0 |
| 2 | 190871 | Neymar Jr | 26 | 92 | Paris Saint-Germain | €118.5M | Right | 5.0 | LW | 5'9 | ... | 81.0 | 94.0 | 27.0 | 24.0 | 33.0 | 9.0 | 9.0 | 15.0 | 15.0 | 11.0 |
| 3 | 193080 | De Gea | 27 | 91 | Manchester United | €72M | Right | 3.0 | GK | 6'4 | ... | 40.0 | 68.0 | 15.0 | 21.0 | 13.0 | 90.0 | 85.0 | 87.0 | 88.0 | 94.0 |
| 4 | 192985 | K. De Bruyne | 27 | 91 | Manchester City | €102M | Right | 5.0 | RCM | 5'11 | ... | 79.0 | 88.0 | 68.0 | 58.0 | 51.0 | 15.0 | 13.0 | 5.0 | 10.0 | 13.0 |
5 rows × 45 columns
Football.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 18207 entries, 0 to 18206 Data columns (total 45 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ID 18207 non-null int64 1 Name 18207 non-null object 2 Age 18207 non-null int64 3 Overall 18207 non-null int64 4 Club 17966 non-null object 5 Value 18207 non-null object 6 Preferred Foot 18159 non-null object 7 Weak Foot 18159 non-null float64 8 Position 18147 non-null object 9 Height 18159 non-null object 10 Weight 18159 non-null object 11 Crossing 18159 non-null float64 12 Finishing 18159 non-null float64 13 HeadingAccuracy 18159 non-null float64 14 ShortPassing 18159 non-null float64 15 Volleys 18159 non-null float64 16 Dribbling 18159 non-null float64 17 Curve 18159 non-null float64 18 FKAccuracy 18159 non-null float64 19 LongPassing 18159 non-null float64 20 BallControl 18159 non-null float64 21 Acceleration 18159 non-null float64 22 SprintSpeed 18159 non-null float64 23 Agility 18159 non-null float64 24 Reactions 18159 non-null float64 25 Balance 18159 non-null float64 26 ShotPower 18159 non-null float64 27 Jumping 18159 non-null float64 28 Stamina 18159 non-null float64 29 Strength 18159 non-null float64 30 LongShots 18159 non-null float64 31 Aggression 18159 non-null float64 32 Interceptions 18159 non-null float64 33 Positioning 18159 non-null float64 34 Vision 18159 non-null float64 35 Penalties 18159 non-null float64 36 Composure 18159 non-null float64 37 Marking 18159 non-null float64 38 StandingTackle 18159 non-null float64 39 SlidingTackle 18159 non-null float64 40 GKDiving 18159 non-null float64 41 GKHandling 18159 non-null float64 42 GKKicking 18159 non-null float64 43 GKPositioning 18159 non-null float64 44 GKReflexes 18159 non-null float64 dtypes: float64(35), int64(3), object(7) memory usage: 6.3+ MB
Data Preprocessing - There are many null values in more than 20 columns and we chose to remove those records which are null because those columns cannot be filled by any techniques like mean,mode and median as the players skills score differes accordingly.
Removing the players who does not belongs to any clubs.
#Removing the rows which are null for more than 20 columns.
Football1 = Football[Football.isnull().sum(axis=1) < 20]
#Removing the players who does not have any clubs.
Footbal= Football1.dropna(axis=0, subset=['Club'])
Footbal.head()
| ID | Name | Age | Overall | Club | Value | Preferred Foot | Weak Foot | Position | Height | ... | Penalties | Composure | Marking | StandingTackle | SlidingTackle | GKDiving | GKHandling | GKKicking | GKPositioning | GKReflexes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 158023 | L. Messi | 31 | 94 | FC Barcelona | €110.5M | Left | 4.0 | RF | 5'7 | ... | 75.0 | 96.0 | 33.0 | 28.0 | 26.0 | 6.0 | 11.0 | 15.0 | 14.0 | 8.0 |
| 1 | 20801 | Cristiano Ronaldo | 33 | 94 | Juventus | €77M | Right | 4.0 | ST | 6'2 | ... | 85.0 | 95.0 | 28.0 | 31.0 | 23.0 | 7.0 | 11.0 | 15.0 | 14.0 | 11.0 |
| 2 | 190871 | Neymar Jr | 26 | 92 | Paris Saint-Germain | €118.5M | Right | 5.0 | LW | 5'9 | ... | 81.0 | 94.0 | 27.0 | 24.0 | 33.0 | 9.0 | 9.0 | 15.0 | 15.0 | 11.0 |
| 3 | 193080 | De Gea | 27 | 91 | Manchester United | €72M | Right | 3.0 | GK | 6'4 | ... | 40.0 | 68.0 | 15.0 | 21.0 | 13.0 | 90.0 | 85.0 | 87.0 | 88.0 | 94.0 |
| 4 | 192985 | K. De Bruyne | 27 | 91 | Manchester City | €102M | Right | 5.0 | RCM | 5'11 | ... | 79.0 | 88.0 | 68.0 | 58.0 | 51.0 | 15.0 | 13.0 | 5.0 | 10.0 | 13.0 |
5 rows × 45 columns
To solve the Use Case 1 we chose to sort the players according to the Defense Position and Defense Skill Scores. Then to sum up the clubs whi has high number of points in defense.
Clubs = Footbal[['Name','Overall','Club','Position']]
DefensiveSkills = Footbal['HeadingAccuracy'] + Footbal['ShortPassing'] + Footbal['LongPassing'] + Footbal['Volleys'] + Footbal['BallControl']
+ Footbal['Marking'] + Footbal['StandingTackle'] + Footbal['SlidingTackle']
Clubs['DefensiveSkills'] = DefensiveSkills
Similarly, adding all skill scores related to physical and mental in order to get the strongest clubs who has a good physical ability to play at defensive positions.
PhysicalnMental = Footbal['Acceleration'] + Footbal['SprintSpeed'] + Footbal['Agility'] + Footbal['Reactions'] + Footbal['Balance'] + Footbal['ShotPower'] + Footbal['Jumping'] + Footbal['Stamina'] + Footbal['Strength'] + Footbal['LongShots'] + Footbal['Aggression'] + Footbal['Interceptions'] + Footbal['Positioning'] + Footbal['Vision'] + Footbal['Penalties'] + Footbal['Composure']
Clubs['FitnessScore'] = PhysicalnMental
Adding Goal Keeper skill scores to a new array and assigning it to a dataframe column as these are required for the analysis of a good defense because goal keepers play decent role in defenfing scoring goals by opponents
GoalKeeper = Footbal['GKDiving'] + Footbal['GKHandling'] + Footbal['GKKicking'] + Footbal['GKPositioning'] + Footbal['GKReflexes']
Clubs['GoalKeeperSkills'] = GoalKeeper
Clubs
| Name | Overall | Club | Position | DefensiveSkills | FitnessScore | GoalKeeperSkills | |
|---|---|---|---|---|---|---|---|
| 0 | L. Messi | 94 | FC Barcelona | RF | 429.0 | 1265.0 | 54.0 |
| 1 | Cristiano Ronaldo | 94 | Juventus | ST | 428.0 | 1332.0 | 58.0 |
| 2 | Neymar Jr | 92 | Paris Saint-Germain | LW | 403.0 | 1254.0 | 59.0 |
| 3 | De Gea | 91 | Manchester United | GK | 177.0 | 781.0 | 444.0 |
| 4 | K. De Bruyne | 91 | Manchester City | RCM | 411.0 | 1296.0 | 56.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 18202 | J. Lundstram | 47 | Crewe Alexandra | CM | 202.0 | 790.0 | 47.0 |
| 18203 | N. Christoffersson | 47 | Trelleborgs FF | ST | 196.0 | 677.0 | 45.0 |
| 18204 | B. Worman | 47 | Cambridge United | ST | 194.0 | 765.0 | 40.0 |
| 18205 | D. Walker-Rice | 47 | Tranmere Rovers | RW | 205.0 | 735.0 | 51.0 |
| 18206 | G. Nugent | 46 | Tranmere Rovers | CM | 219.0 | 782.0 | 55.0 |
17918 rows × 7 columns
Selecting the players who plays at defensive positions and storing it to new dataframe as we mainly concentate on the players who play at these positions.
Club1 = Clubs[Clubs['Position'].isin(['GK','SW','FB','LWB','RWB','WB','LDM','RDM','CDM','LB','LCB','CB','RB','RCB','DM'])]
Club1
| Name | Overall | Club | Position | DefensiveSkills | FitnessScore | GoalKeeperSkills | |
|---|---|---|---|---|---|---|---|
| 3 | De Gea | 91 | Manchester United | GK | 177.0 | 781.0 | 444.0 |
| 8 | Sergio Ramos | 91 | Real Madrid | RCB | 396.0 | 1236.0 | 46.0 |
| 9 | J. Oblak | 90 | Atlético Madrid | GK | 99.0 | 749.0 | 433.0 |
| 12 | D. Godín | 90 | Atlético Madrid | CB | 364.0 | 1097.0 | 49.0 |
| 14 | N. Kanté | 89 | Chelsea | LDM | 357.0 | 1287.0 | 54.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 18192 | S. Squire | 47 | Cambridge United | CDM | 211.0 | 789.0 | 55.0 |
| 18193 | N. Fuentes | 47 | Unión Española | RB | 146.0 | 732.0 | 45.0 |
| 18194 | J. Milli | 47 | Lecce | GK | 78.0 | 358.0 | 240.0 |
| 18198 | J. Livesey | 47 | Burton Albion | GK | 73.0 | 413.0 | 246.0 |
| 18201 | D. Walsh | 47 | Waterford FC | RB | 156.0 | 675.0 | 52.0 |
9194 rows × 7 columns
c1 = Club1.sort_values(by='Overall', ascending=False).head(10)
Plotting a line graph of the top 10 players who has good overall score.
fig = go.Figure(data=[
go.Scatter(name='Overall Rating', x=c1['Name'], y=c1['Overall'],hovertext=c1['Club']),
])
fig.show()
c2=Club1.sort_values(by='DefensiveSkills', ascending=False).head(10)
Plotting a line graph of the top 10 players who has good Defensive skills score.
fig = go.Figure(data=[
go.Scatter(name='Defensive Skills Rating', x=c2['Name'], y=c2['DefensiveSkills'],hovertext=c2['Club'])
])
fig.show()
c3=Club1.sort_values(by='GoalKeeperSkills', ascending=False).head(10)
Plotting a line graph of the top 10 players who has good Goalkeeping skills score.
fig = go.Figure(data=[
go.Scatter(name='Goal Keeper Skills Rating', x=c3['Name'], y=c3['GoalKeeperSkills'],hovertext=c3['Club'])
])
fig.show()
Grouping by clubs to know the total scores of overall score, defensive skills score,fitness scores and goalkeeping skills score
Club2 = Club1.groupby(['Club'])['Overall','DefensiveSkills','FitnessScore','GoalKeeperSkills'].sum().reset_index()
Club2
| Club | Overall | DefensiveSkills | FitnessScore | GoalKeeperSkills | |
|---|---|---|---|---|---|
| 0 | SSV Jahn Regensburg | 1115 | 4051.0 | 14315.0 | 1637.0 |
| 1 | 1. FC Heidenheim 1846 | 984 | 3401.0 | 12803.0 | 1606.0 |
| 2 | 1. FC Kaiserslautern | 1065 | 3969.0 | 14405.0 | 1539.0 |
| 3 | 1. FC Köln | 1049 | 3604.0 | 13041.0 | 1877.0 |
| 4 | 1. FC Magdeburg | 868 | 2928.0 | 10801.0 | 1464.0 |
| ... | ... | ... | ... | ... | ... |
| 655 | Zagłębie Sosnowiec | 904 | 3299.0 | 12326.0 | 1543.0 |
| 656 | Çaykur Rizespor | 1385 | 5388.0 | 18906.0 | 2170.0 |
| 657 | Örebro SK | 784 | 2589.0 | 10121.0 | 1353.0 |
| 658 | Östersunds FK | 564 | 1920.0 | 7447.0 | 1183.0 |
| 659 | Śląsk Wrocław | 944 | 3614.0 | 13104.0 | 1515.0 |
660 rows × 5 columns
Summing up defensive skill scores and goalkeeping skills score to get the total defensive points that clubs have.
Club2['OverallDefenseScore'] = Club2['DefensiveSkills'] + Club2['GoalKeeperSkills']
#Sorting the top 10 clubs that have good overall defense scores to analyze the clubs which prevents opponents scoring a goal.
Club3 = Club2.sort_values('OverallDefenseScore', ascending=False).head(10)
Club3
| Club | Overall | DefensiveSkills | FitnessScore | GoalKeeperSkills | OverallDefenseScore | |
|---|---|---|---|---|---|---|
| 367 | Leicester City | 1473 | 5670.0 | 19256.0 | 2083.0 | 7753.0 |
| 383 | Manchester City | 1432 | 5533.0 | 18805.0 | 2157.0 | 7690.0 |
| 206 | Everton | 1435 | 5592.0 | 18782.0 | 2044.0 | 7636.0 |
| 593 | Toulouse Football Club | 1388 | 5343.0 | 18355.0 | 2254.0 | 7597.0 |
| 52 | Arsenal | 1495 | 5566.0 | 19362.0 | 1997.0 | 7563.0 |
| 656 | Çaykur Rizespor | 1385 | 5388.0 | 18906.0 | 2170.0 | 7558.0 |
| 428 | Olympique de Marseille | 1396 | 5158.0 | 18083.0 | 2098.0 | 7256.0 |
| 212 | FC Barcelona | 1373 | 5039.0 | 17278.0 | 2215.0 | 7254.0 |
| 17 | AS Monaco | 1376 | 4975.0 | 17637.0 | 2224.0 | 7199.0 |
| 479 | Real Madrid | 1401 | 4905.0 | 17486.0 | 2282.0 | 7187.0 |
Create figure - Bar plot of top 10 clubs who has good Defensive skills, fitness score and goal keeping scores,From the below plot we can conclude that the Leicester City Football club could conceed less goals.As the club leicester city has pretty good overall, goalkeeping, fitness and defense scores.
fig = go.Figure(data=[
go.Bar(name='Overall Skills Rating', x=Club3['Club'], y=Club3['Overall'],hovertext=Club3['FitnessScore']),
go.Bar(name='Defensive Score Skills Rating', x=Club3['Club'], y=Club3['DefensiveSkills'],hovertext=Club3['FitnessScore']),
go.Bar(name='Goal Keeper Skills Rating', x=Club3['Club'], y=Club3['GoalKeeperSkills'],hovertext=Club3['FitnessScore'])
])
# Set templates
fig.update_layout(template="ggplot2")
fig.show()
Club11 = Club1.loc[Club1['Club'] == 'Manchester City']
Club11
| Name | Overall | Club | Position | DefensiveSkills | FitnessScore | GoalKeeperSkills | |
|---|---|---|---|---|---|---|---|
| 57 | Ederson | 86 | Manchester City | GK | 183.0 | 782.0 | 425.0 |
| 75 | Fernandinho | 86 | Manchester City | CDM | 385.0 | 1223.0 | 48.0 |
| 89 | N. Otamendi | 85 | Manchester City | CB | 359.0 | 1091.0 | 48.0 |
| 107 | V. Kompany | 85 | Manchester City | CB | 339.0 | 1064.0 | 38.0 |
| 113 | A. Laporte | 84 | Manchester City | LCB | 352.0 | 1042.0 | 45.0 |
| 135 | K. Walker | 84 | Manchester City | RB | 354.0 | 1236.0 | 57.0 |
| 182 | J. Stones | 83 | Manchester City | RCB | 345.0 | 1054.0 | 51.0 |
| 352 | B. Mendy | 81 | Manchester City | LB | 324.0 | 1194.0 | 47.0 |
| 514 | F. Delph | 80 | Manchester City | LB | 359.0 | 1227.0 | 50.0 |
| 645 | Danilo | 79 | Manchester City | RB | 366.0 | 1185.0 | 53.0 |
| 871 | C. Bravo | 78 | Manchester City | GK | 180.0 | 775.0 | 393.0 |
| 1304 | E. Mangala | 76 | Manchester City | CB | 285.0 | 1014.0 | 53.0 |
| 4652 | O. Zinchenko | 71 | Manchester City | LB | 348.0 | 1109.0 | 61.0 |
| 6316 | P. Sandler | 69 | Manchester City | CB | 303.0 | 974.0 | 64.0 |
| 8966 | C. Gomes | 66 | Manchester City | CDM | 293.0 | 1007.0 | 46.0 |
| 13211 | Eric García | 62 | Manchester City | CB | 262.0 | 835.0 | 50.0 |
| 13814 | C. Humphreys-Grant | 62 | Manchester City | CB | 242.0 | 887.0 | 55.0 |
| 14511 | A. Muric | 61 | Manchester City | GK | 117.0 | 575.0 | 303.0 |
| 17451 | D. Grimshaw | 54 | Manchester City | GK | 137.0 | 531.0 | 270.0 |
Plotting line graph of Manchester city club players overall and defensive skill scores to analyze how good the players are in those areas. From the plot we can say that Manchester City has almost 50% of the players who has good defensive skills and overall scores for example Fernandhino has over all score as 86 and defensive skills score as 385.
fig = go.Figure()
# Add trace
fig.add_trace(
go.Scatter(x=Club11['Overall'], y=Club11['DefensiveSkills'],hovertext=Club11['Name'],line_color='red')
)
# Add images
fig.add_layout_image(
dict(
source="undefined.jpg",
xref="x",
yref="y",
x=52,
y=500,
sizex=40,
sizey=400,
sizing="stretch",
opacity=0.5,
layer="below"))
Plotting line graph of Manchester city club players overall and Goalkeeping skill scores to analyze how good the players are in those areas. From the plot we can say that Manchester City has 2 good players who has good Goalkeeping skills and overall scores for example Ederson has over all score as 86 and Goalkeeping skills score as 425.
fig = go.Figure()
# Add trace
fig.add_trace(
go.Scatter(x=Club11['Overall'], y=Club11['GoalKeeperSkills'],hovertext=Club11['Name'],line_color='red')
)
# Add images
fig.add_layout_image(
dict(
source="undefined.jpg",
xref="x",
yref="y",
x=52,
y=500,
sizex=40,
sizey=500,
sizing="stretch",
opacity=0.5,
layer="below"))
Club12 = Club1.loc[Club1['Club'] == 'Leicester City']
Club12
| Name | Overall | Club | Position | DefensiveSkills | FitnessScore | GoalKeeperSkills | |
|---|---|---|---|---|---|---|---|
| 149 | K. Schmeichel | 84 | Leicester City | GK | 161.0 | 737.0 | 409.0 |
| 259 | H. Maguire | 82 | Leicester City | LCB | 353.0 | 1030.0 | 62.0 |
| 341 | Ricardo Pereira | 81 | Leicester City | RB | 357.0 | 1207.0 | 65.0 |
| 425 | W. Ndidi | 80 | Leicester City | LDM | 361.0 | 1204.0 | 54.0 |
| 670 | Iborra | 79 | Leicester City | CDM | 390.0 | 1047.0 | 54.0 |
| 742 | B. Chilwell | 78 | Leicester City | LB | 324.0 | 1117.0 | 48.0 |
| 1666 | C. Fuchs | 75 | Leicester City | LB | 374.0 | 1113.0 | 70.0 |
| 1869 | D. Amartey | 75 | Leicester City | RB | 353.0 | 1122.0 | 62.0 |
| 1930 | W. Morgan | 75 | Leicester City | RCB | 262.0 | 836.0 | 45.0 |
| 1947 | C. Söyüncü | 75 | Leicester City | LCB | 286.0 | 1004.0 | 34.0 |
| 1984 | N. Mendy | 75 | Leicester City | RDM | 335.0 | 1116.0 | 62.0 |
| 2332 | D. Simpson | 74 | Leicester City | RB | 311.0 | 1050.0 | 58.0 |
| 2907 | D. Ward | 73 | Leicester City | GK | 121.0 | 604.0 | 353.0 |
| 3197 | Y. Benalouane | 73 | Leicester City | CB | 303.0 | 944.0 | 65.0 |
| 3528 | E. Jakupović | 72 | Leicester City | GK | 122.0 | 641.0 | 362.0 |
| 6537 | H. Choudhury | 69 | Leicester City | CDM | 298.0 | 1017.0 | 52.0 |
| 10391 | C. Elder | 65 | Leicester City | LB | 276.0 | 918.0 | 61.0 |
| 11584 | S. Hughes | 64 | Leicester City | CB | 217.0 | 858.0 | 56.0 |
| 13203 | D. Johnson | 62 | Leicester City | CB | 246.0 | 870.0 | 59.0 |
| 13602 | J. Knight | 62 | Leicester City | CB | 220.0 | 821.0 | 52.0 |
Plotting line graph of Leicester city club players overall and Defensive skill scores to analyze how good the players are in those areas. From the plot we can say that Manchester City has good players who has good Defensive skills and overall scores for example Maguire has over all score as 82 and Defensive skills score as 353.
fig = go.Figure()
# Add trace
fig.add_trace(
go.Scatter(x=Club12['Overall'], y=Club12['DefensiveSkills'],hovertext=Club12['Name'],line_color='red')
)
# Add images
fig.add_layout_image(
dict(
source="Leicester.jpg",
xref="x",
yref="y",
x=60,
y=500,
sizex=50,
sizey=500,
# sizing="stretch",
opacity=0.5,
layer="below"))
Plotting line graph of Leicester city club players overall and GoalKeeping skill scores to analyze how good the players are in those areas. From the plot we can say that Leicester City has good players who has good GoalKeeping skills and overall scores for example Schmeichel has over all score as 84 and GoalKeeping skills score as 409.
fig = go.Figure()
# Add trace
fig.add_trace(
go.Scatter(x=Club12['Overall'], y=Club12['GoalKeeperSkills'],hovertext=Club12['Name'],line_color='red')
)
# Add images
fig.add_layout_image(
dict(
source="Leicester.jpg",
xref="x",
yref="y",
x=62,
y=500,
sizex=40,
sizey=500,
sizing="stretch",
opacity=0.5,
layer="below"))
Club13 = Club1.loc[Club1['Club'] == 'Everton']
Club13
| Name | Overall | Club | Position | DefensiveSkills | FitnessScore | GoalKeeperSkills | |
|---|---|---|---|---|---|---|---|
| 180 | J. Pickford | 83 | Everton | GK | 185.0 | 771.0 | 416.0 |
| 198 | I. Gueye | 83 | Everton | RDM | 332.0 | 1220.0 | 47.0 |
| 444 | André Gomes | 80 | Everton | LDM | 373.0 | 1152.0 | 49.0 |
| 451 | M. Keane | 80 | Everton | RCB | 330.0 | 1012.0 | 56.0 |
| 475 | L. Digne | 80 | Everton | LB | 345.0 | 1175.0 | 46.0 |
| 517 | S. Coleman | 80 | Everton | RWB | 359.0 | 1177.0 | 50.0 |
| 617 | K. Zouma | 79 | Everton | CB | 310.0 | 1055.0 | 59.0 |
| 699 | M. Schneiderlin | 79 | Everton | CDM | 358.0 | 1108.0 | 50.0 |
| 767 | Y. Mina | 78 | Everton | LCB | 305.0 | 1010.0 | 51.0 |
| 945 | L. Baines | 77 | Everton | LB | 369.0 | 1168.0 | 58.0 |
| 1009 | J. McCarthy | 77 | Everton | CDM | 356.0 | 1089.0 | 54.0 |
| 1078 | P. Jagielka | 77 | Everton | CB | 317.0 | 974.0 | 169.0 |
| 1198 | M. Holgate | 76 | Everton | CB | 324.0 | 1000.0 | 58.0 |
| 2834 | M. Stekelenburg | 73 | Everton | GK | 132.0 | 582.0 | 365.0 |
| 3235 | J. Kenny | 72 | Everton | RB | 282.0 | 1020.0 | 63.0 |
| 6750 | T. Browning | 68 | Everton | CB | 271.0 | 884.0 | 51.0 |
| 8463 | B. Galloway | 67 | Everton | LB | 285.0 | 951.0 | 45.0 |
| 8694 | B. Baningime | 67 | Everton | CDM | 293.0 | 959.0 | 59.0 |
| 15577 | João Virgínia | 59 | Everton | GK | 66.0 | 475.0 | 298.0 |
Plotting line graph of Everton club players overall and Defensive skill scores to analyze how good the players are in those areas. From the plot we can say that Everton has good players who has good Defensive skills and overall scores for example GUEYE has over all score as 83 and Defensive skills score as 332.
fig = go.Figure()
# Add trace
fig.add_trace(
go.Scatter(x=Club13['Overall'], y=Club13['DefensiveSkills'],hovertext=Club13['Name'],line_color='red')
)
# Add images
fig.add_layout_image(
dict(
source="teahub.png",
xref="x",
yref="y",
x=50,
y=500,
sizex=40,
sizey=500,
sizing="stretch",
opacity=0.5,
layer="below"))
Plotting line graph of Everton club players overall and GoalKeeping skill scores to analyze how good the players are in those areas. From the plot we can say that Everton has good players who has good GoalKeeping skills and overall scores for example Pickford has over all score as 88 and GoalKeeping skills score as 416.
fig = go.Figure()
# Add trace
fig.add_trace(
go.Scatter(x=Club13['Overall'], y=Club13['GoalKeeperSkills'],hovertext=Club13['Name'],line_color='red')
)
# Add images
fig.add_layout_image(
dict(
source="teahub.png",
xref="x",
yref="y",
x=56,
y=500,
sizex=40,
sizey=500,
sizing="stretch",
opacity=0.5,
layer="below"))
Conclusion: To conclude our usecase 1 to find the club which is most likely to conceed less goals, First we chose the skills which are responsible for the defense and those players are likely to stop opponents scoring goal. Then we filtered the players who plays at the defensive positions like CDM,GK etc. After filtering out the players, sorted the top 10 clubs having highest defensive skills, fitness scores and goalkeeping skills. We came to say that the football club Leicester city might have conceeded less goals compared to other clubs.
Footbal
| ID | Name | Age | Overall | Club | Value | Preferred Foot | Weak Foot | Position | Height | ... | Penalties | Composure | Marking | StandingTackle | SlidingTackle | GKDiving | GKHandling | GKKicking | GKPositioning | GKReflexes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 158023 | L. Messi | 31 | 94 | FC Barcelona | €110.5M | Left | 4.0 | RF | 5'7 | ... | 75.0 | 96.0 | 33.0 | 28.0 | 26.0 | 6.0 | 11.0 | 15.0 | 14.0 | 8.0 |
| 1 | 20801 | Cristiano Ronaldo | 33 | 94 | Juventus | €77M | Right | 4.0 | ST | 6'2 | ... | 85.0 | 95.0 | 28.0 | 31.0 | 23.0 | 7.0 | 11.0 | 15.0 | 14.0 | 11.0 |
| 2 | 190871 | Neymar Jr | 26 | 92 | Paris Saint-Germain | €118.5M | Right | 5.0 | LW | 5'9 | ... | 81.0 | 94.0 | 27.0 | 24.0 | 33.0 | 9.0 | 9.0 | 15.0 | 15.0 | 11.0 |
| 3 | 193080 | De Gea | 27 | 91 | Manchester United | €72M | Right | 3.0 | GK | 6'4 | ... | 40.0 | 68.0 | 15.0 | 21.0 | 13.0 | 90.0 | 85.0 | 87.0 | 88.0 | 94.0 |
| 4 | 192985 | K. De Bruyne | 27 | 91 | Manchester City | €102M | Right | 5.0 | RCM | 5'11 | ... | 79.0 | 88.0 | 68.0 | 58.0 | 51.0 | 15.0 | 13.0 | 5.0 | 10.0 | 13.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 18202 | 238813 | J. Lundstram | 19 | 47 | Crewe Alexandra | €60K | Right | 2.0 | CM | 5'9 | ... | 43.0 | 45.0 | 40.0 | 48.0 | 47.0 | 10.0 | 13.0 | 7.0 | 8.0 | 9.0 |
| 18203 | 243165 | N. Christoffersson | 19 | 47 | Trelleborgs FF | €60K | Right | 2.0 | ST | 6'3 | ... | 43.0 | 42.0 | 22.0 | 15.0 | 19.0 | 10.0 | 9.0 | 9.0 | 5.0 | 12.0 |
| 18204 | 241638 | B. Worman | 16 | 47 | Cambridge United | €60K | Right | 3.0 | ST | 5'8 | ... | 55.0 | 41.0 | 32.0 | 13.0 | 11.0 | 6.0 | 5.0 | 10.0 | 6.0 | 13.0 |
| 18205 | 246268 | D. Walker-Rice | 17 | 47 | Tranmere Rovers | €60K | Right | 3.0 | RW | 5'10 | ... | 50.0 | 46.0 | 20.0 | 25.0 | 27.0 | 14.0 | 6.0 | 14.0 | 8.0 | 9.0 |
| 18206 | 246269 | G. Nugent | 16 | 46 | Tranmere Rovers | €60K | Right | 3.0 | CM | 5'10 | ... | 33.0 | 43.0 | 40.0 | 43.0 | 50.0 | 10.0 | 15.0 | 9.0 | 12.0 | 9.0 |
17918 rows × 45 columns
Our approach is to classify the difference between the positions like cdm,gk and st. In order to do that, get the player general playing skill scores, fitness scores, defensive scores and the goal keeping scores into a features list. train the positions CDM,ST and GK with those features and apply k-means classifier to classify the difference.
Positions = Footbal[['Name','Position','Crossing','Finishing','HeadingAccuracy','ShortPassing','Volleys','Dribbling','Curve','FKAccuracy','LongPassing','BallControl','Marking','StandingTackle','SlidingTackle']]
PhysicalnMental = Footbal['Acceleration'] + Footbal['SprintSpeed'] + Footbal['Agility'] + Footbal['Reactions'] + Footbal['Balance'] + Footbal['ShotPower'] + Footbal['Jumping'] + Footbal['Stamina'] + Footbal['Strength'] + Footbal['LongShots'] + Footbal['Aggression'] + Footbal['Interceptions'] + Footbal['Positioning'] + Footbal['Vision'] + Footbal['Penalties'] + Footbal['Composure']
Positions['FitnessScore'] = PhysicalnMental
GoalKeeper = Footbal['GKDiving'] + Footbal['GKHandling'] + Footbal['GKKicking'] + Footbal['GKPositioning'] + Footbal['GKReflexes']
Positions['GoalKeeperSkills'] = GoalKeeper
# Sorting the positions.
Position = Positions[Positions['Position'].isin(['GK','ST','CDM'])]
# Applying label encoding of positions to pass into the classifier.
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
Position.Position = le.fit_transform(Position.Position)
x = Position.iloc[:,2:17]
y = Position.iloc[:,1:2]
x
| Crossing | Finishing | HeadingAccuracy | ShortPassing | Volleys | Dribbling | Curve | FKAccuracy | LongPassing | BallControl | Marking | StandingTackle | SlidingTackle | FitnessScore | GoalKeeperSkills | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 84.0 | 94.0 | 89.0 | 81.0 | 87.0 | 88.0 | 81.0 | 76.0 | 77.0 | 94.0 | 28.0 | 31.0 | 23.0 | 1332.0 | 58.0 |
| 3 | 17.0 | 13.0 | 21.0 | 50.0 | 13.0 | 18.0 | 21.0 | 19.0 | 51.0 | 42.0 | 15.0 | 21.0 | 13.0 | 781.0 | 444.0 |
| 9 | 13.0 | 11.0 | 15.0 | 29.0 | 13.0 | 12.0 | 13.0 | 14.0 | 26.0 | 16.0 | 27.0 | 12.0 | 18.0 | 749.0 | 433.0 |
| 10 | 62.0 | 91.0 | 85.0 | 83.0 | 89.0 | 85.0 | 77.0 | 86.0 | 65.0 | 89.0 | 34.0 | 42.0 | 19.0 | 1280.0 | 51.0 |
| 16 | 75.0 | 94.0 | 85.0 | 80.0 | 84.0 | 80.0 | 78.0 | 68.0 | 82.0 | 84.0 | 56.0 | 36.0 | 38.0 | 1260.0 | 54.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 18194 | 10.0 | 6.0 | 10.0 | 25.0 | 6.0 | 12.0 | 13.0 | 10.0 | 20.0 | 17.0 | 6.0 | 10.0 | 11.0 | 358.0 | 240.0 |
| 18198 | 14.0 | 8.0 | 14.0 | 19.0 | 8.0 | 10.0 | 13.0 | 10.0 | 21.0 | 11.0 | 15.0 | 11.0 | 13.0 | 413.0 | 246.0 |
| 18200 | 28.0 | 47.0 | 47.0 | 42.0 | 37.0 | 39.0 | 32.0 | 25.0 | 30.0 | 41.0 | 15.0 | 17.0 | 14.0 | 778.0 | 61.0 |
| 18203 | 23.0 | 52.0 | 52.0 | 43.0 | 36.0 | 39.0 | 32.0 | 20.0 | 25.0 | 40.0 | 22.0 | 15.0 | 19.0 | 677.0 | 45.0 |
| 18204 | 25.0 | 40.0 | 46.0 | 38.0 | 38.0 | 45.0 | 38.0 | 27.0 | 28.0 | 44.0 | 32.0 | 13.0 | 11.0 | 765.0 | 40.0 |
5058 rows × 15 columns
#Splitting up the dataset for training and testing.
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3315508,random_state=1)
#Normalizing the values to scalar.
sc_x = StandardScaler()
x_train1 = sc_x.fit_transform(x_train)
x_test1 = sc_x.transform(x_test)
x_train1
array([[-0.92125946, -1.09498931, -1.14796569, ..., -0.52062783,
-1.13143059, 0.85705278],
[-0.03636449, 0.86270622, 0.45237271, ..., -0.31022349,
0.45971324, -0.77945264],
[-0.81715417, -0.89921976, -1.18900001, ..., -0.99403759,
-1.11813413, 1.27569371],
...,
[ 1.212899 , 1.1759375 , 1.3551277 , ..., 0.58399494,
1.23977261, -0.8022876 ],
[-1.0774174 , -1.13414322, -1.18900001, ..., -0.88883542,
-1.33087759, 1.02450915],
[-1.02536476, -1.0558354 , -1.18900001, ..., -0.62583 ,
-1.01176239, 1.70194628]])
#Creating a classifier with standard n-neighbors as 5 and p value as 2.
classifier= neighbors.KNeighborsClassifier(n_neighbors=5,p=2)
classifier.fit(x_train1,y_train)
print(classifier)
KNeighborsClassifier()
#Predicting the trained classifer to get the classified results to predict the positions according to the features passed.
y_pred = classifier.predict(x_test1)
first_pred = metrics.accuracy_score(y_test, y_pred)*100
print("Accuracy:",metrics.accuracy_score(y_test, y_pred)*100)
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))
Accuracy: 99.76147883124628
[[306 0 0]
[ 0 670 0]
[ 4 0 697]]
precision recall f1-score support
0 0.99 1.00 0.99 306
1 1.00 1.00 1.00 670
2 1.00 0.99 1.00 701
accuracy 1.00 1677
macro avg 1.00 1.00 1.00 1677
weighted avg 1.00 1.00 1.00 1677
Visualizing the confusion matrix , we can say that there is an 100% accuracy and f1 score that conveys that the classifier has predicted the positions correct according to the features passed for defense, striker, goal keeper.
from sklearn.naive_bayes import GaussianNB
from yellowbrick.classifier import ClassificationReport
# Instantiate the classification model and visualizer
classifier1= neighbors.KNeighborsClassifier(n_neighbors=5,p=2)
classes=['Central Defensive Midfielder','Goal Keeper','Striker']
visualizer = ClassificationReport(classifier1, classes=classes, support=True)
visualizer.fit(x_train1, y_train) # Fit the visualizer and the model
visualizer.score(x_test1, y_test) # Evaluate the model on the test data
visualizer.show()
<AxesSubplot:title={'center':'KNeighborsClassifier Classification Report'}>
y_test
| Position | |
|---|---|
| 15332 | 1 |
| 3248 | 2 |
| 9297 | 1 |
| 13922 | 1 |
| 14508 | 2 |
| ... | ... |
| 3876 | 0 |
| 12948 | 0 |
| 11941 | 2 |
| 16571 | 1 |
| 5148 | 1 |
1677 rows × 1 columns
x_test
| Crossing | Finishing | HeadingAccuracy | ShortPassing | Volleys | Dribbling | Curve | FKAccuracy | LongPassing | BallControl | Marking | StandingTackle | SlidingTackle | FitnessScore | GoalKeeperSkills | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 15332 | 16.0 | 11.0 | 15.0 | 27.0 | 16.0 | 15.0 | 15.0 | 15.0 | 22.0 | 21.0 | 8.0 | 18.0 | 14.0 | 504.0 | 293.0 |
| 3248 | 67.0 | 70.0 | 71.0 | 68.0 | 61.0 | 75.0 | 62.0 | 66.0 | 68.0 | 75.0 | 37.0 | 39.0 | 30.0 | 1084.0 | 61.0 |
| 9297 | 10.0 | 7.0 | 22.0 | 19.0 | 12.0 | 25.0 | 24.0 | 14.0 | 28.0 | 20.0 | 9.0 | 10.0 | 10.0 | 588.0 | 330.0 |
| 13922 | 13.0 | 18.0 | 16.0 | 24.0 | 17.0 | 18.0 | 18.0 | 16.0 | 21.0 | 26.0 | 21.0 | 20.0 | 19.0 | 456.0 | 304.0 |
| 14508 | 54.0 | 62.0 | 63.0 | 57.0 | 61.0 | 54.0 | 42.0 | 37.0 | 53.0 | 57.0 | 31.0 | 24.0 | 23.0 | 903.0 | 57.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3876 | 69.0 | 66.0 | 43.0 | 72.0 | 57.0 | 70.0 | 82.0 | 80.0 | 71.0 | 72.0 | 58.0 | 73.0 | 72.0 | 1132.0 | 46.0 |
| 12948 | 51.0 | 49.0 | 53.0 | 67.0 | 46.0 | 66.0 | 68.0 | 64.0 | 65.0 | 67.0 | 61.0 | 58.0 | 56.0 | 1053.0 | 57.0 |
| 11941 | 38.0 | 61.0 | 64.0 | 50.0 | 59.0 | 59.0 | 61.0 | 70.0 | 39.0 | 62.0 | 38.0 | 23.0 | 22.0 | 955.0 | 51.0 |
| 16571 | 10.0 | 5.0 | 10.0 | 29.0 | 7.0 | 5.0 | 10.0 | 8.0 | 24.0 | 10.0 | 8.0 | 9.0 | 11.0 | 471.0 | 286.0 |
| 5148 | 18.0 | 17.0 | 12.0 | 13.0 | 15.0 | 16.0 | 12.0 | 19.0 | 13.0 | 18.0 | 15.0 | 11.0 | 12.0 | 710.0 | 346.0 |
1677 rows × 15 columns
#Adding the predicted and actual scores to the dataframe.
x_test['Predicted_position'] = pd.Series(y_pred, index=x_test.index)
x_test['Actuaal Position'] = y_test
x_test.head()
| Crossing | Finishing | HeadingAccuracy | ShortPassing | Volleys | Dribbling | Curve | FKAccuracy | LongPassing | BallControl | Marking | StandingTackle | SlidingTackle | FitnessScore | GoalKeeperSkills | Predicted_position | Actuaal Position | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 15332 | 16.0 | 11.0 | 15.0 | 27.0 | 16.0 | 15.0 | 15.0 | 15.0 | 22.0 | 21.0 | 8.0 | 18.0 | 14.0 | 504.0 | 293.0 | 1 | 1 |
| 3248 | 67.0 | 70.0 | 71.0 | 68.0 | 61.0 | 75.0 | 62.0 | 66.0 | 68.0 | 75.0 | 37.0 | 39.0 | 30.0 | 1084.0 | 61.0 | 2 | 2 |
| 9297 | 10.0 | 7.0 | 22.0 | 19.0 | 12.0 | 25.0 | 24.0 | 14.0 | 28.0 | 20.0 | 9.0 | 10.0 | 10.0 | 588.0 | 330.0 | 1 | 1 |
| 13922 | 13.0 | 18.0 | 16.0 | 24.0 | 17.0 | 18.0 | 18.0 | 16.0 | 21.0 | 26.0 | 21.0 | 20.0 | 19.0 | 456.0 | 304.0 | 1 | 1 |
| 14508 | 54.0 | 62.0 | 63.0 | 57.0 | 61.0 | 54.0 | 42.0 | 37.0 | 53.0 | 57.0 | 31.0 | 24.0 | 23.0 | 903.0 | 57.0 | 2 | 2 |
Scatter 3D plot of players according to finishing, fitness score and goal keepers skills. Because, to differentiate the goak keeper from other positions like ST and CDM we need the goalkeeping skill scores as only the goal keeper have more points that ST and CDM. similarly, goalkeepers have less fitness score as they dont do any physical skillset like penalty shoots, sprint etc. To Differentiate the STriker position we have taken the finishing skill as strikers play forward towards opponents goal post scoring goal.
import plotly.express as px
fig = px.scatter_3d(x_test, x='GoalKeeperSkills', y='FitnessScore', z='Finishing',
color='Predicted_position')
fig.show()
Scatter 3D plot of players according to Marking, Standing Tackle score and Sliding Tackle skills. Because, to differentiate the Central Defensive Midfielder from other positions like ST and GK we need the Defensive skill scores as only the CDM have more points that ST and GK.
fig = px.scatter_3d(x_test, x='Marking', y='StandingTackle', z='SlidingTackle',
color='Predicted_position')
fig.show()
# player differences in different position
def plot_radar(name_list, features, df):
N = len(features)
# Assign series colors, title here
series_color = ['r', 'b', 'g', 'y', 'o']
text = 'Comparing Player Skills'
plt.clf()
fig=plt.figure(figsize=(10,10))
ax = fig.add_subplot(111, polar=True)
for idx, name in enumerate(name_list,0):
#Set the angle of polar axis and close the plot
angles=np.linspace(0, 2*np.pi, N, endpoint=False)
angles=np.concatenate((angles,[angles[0]]))
#Define the stats for each series and close the plot in radar chart.
stats = df[labels].loc[df['Name']==name]
stats = stats.values
stats = np.concatenate((stats[0],[stats[0][0]]))
#Plot series 1,2,3,4....:
ax.plot(angles, stats, 'o-',
linewidth=2, color = series_color[idx],
label=name) #label here is to populate the legend key
ax.fill(angles, stats, alpha=0.3,
color = series_color[idx])
idx+=1
### Plot formatting
plt.xticks(angles[:-1], labels,horizontalalignment='center',
verticalalignment='bottom')
ax.set_rlabel_position(0)
plt.yticks( [20, 40, 60, 80],
['20', '40', '60', '80'], color= 'k', size=12, weight = 'bold')
plt.ylim(0,100)
# ax.set_thetagrids(angles * 180/np.pi, features)
# ax.set_title(text, weight='bold', fontsize = 12,
# horizontalalignment='center',
# verticalalignment='top')
plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
return
labels = ['GKReflexes','GKHandling', 'Aggression','ShortPassing', 'SprintSpeed','Finishing']
plot_radar(['De Gea','Casemiro', 'Cristiano Ronaldo'], labels, football)
<Figure size 576x396 with 0 Axes>
labels = ['GKKicking','GKDiving','Crossing', 'SlidingTackle','Marking','BallControl']
plot_radar(['De Gea','Casemiro', 'Cristiano Ronaldo'], labels, Footbal)
<Figure size 576x396 with 0 Axes>
From both these plots we can see that De Gea (GK) has good goal keeping skills GKDiving,GKkicking,GKReflexes and GKHandling but very few skills like Marking,crossing,sliding tackle,finishing which belongs to player with position forward(ST) and CDM.Even for both the players Casemiro(CDM) and Cristiano Ronaldo(ST) we can see them having good skills with respect to their position.
Conclusion- To find out the differences between the players of the position ST,CDM and GK we decided to use the features or characteristics of a player playing in different position for that We used KNeighborsClassifier where we got 100% accuracy and f1 score which conveyed that the classifier has predicted the positions correct according to the features passed for defense, striker and goal keeper.So,for the further analysis we decided to plot a radar chart for three different players in different positions from which we can see that De Gea (GK) has good goal keeping skills GKDiving,GKkicking,GKReflexes and GKHandling but very few skills like Marking,crossing,sliding tackle,finishing which belongs to player with position forward(ST) and CDM.Even for both the players Casemiro(CDM) and Cristiano Ronaldo(ST) we can see them having good skills with respect to their position
with open('football.txt', 'w', encoding='utf-8', errors='ignore') as file:
pd.read_excel('2021-06-23_Dataset_Fifa_19.xlsx').to_string(file, index=False)
players=pd.read_csv('football.txt')
players
players = players[['ID','Name','Age','Overall','Club','Value','Preferred Foot','Weak Foot','Position','Height','Weight','Crossing','Finishing','HeadingAccuracy','ShortPassing','Volleys','Dribbling','Curve','FKAccuracy','LongPassing',
'BallControl','Acceleration','SprintSpeed','Agility','Reactions','Balance','ShotPower','Jumping','Stamina','Strength','LongShots',
'Aggression','Interceptions','Positioning','Vision','Penalties','Composure','Marking','StandingTackle','SlidingTackle','GKDiving','GKHandling','GKKicking','GKPositioning','GKReflexes']]
players = players[players.isnull().sum(axis=1) < 20]
players['Weight']
players['Weight']= players['Weight'].astype(str).str[:3]
players['Weight'] = players['Weight'].astype(float)
players['Weight']
players =players.fillna(0.0)
def parse_ht(ht):
ht_ = ht.split("'")
return float(ht_[0] +'.'+ ht_[1])
players['Height']= players['Height'].apply(lambda x:parse_ht(x))
players
| ID | Name | Age | Overall | Club | Value | Preferred Foot | Weak Foot | Position | Height | ... | Penalties | Composure | Marking | StandingTackle | SlidingTackle | GKDiving | GKHandling | GKKicking | GKPositioning | GKReflexes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 158023 | L. Messi | 31 | 94 | FC Barcelona | €110.5M | Left | 4.0 | RF | 5.70 | ... | 75.0 | 96.0 | 33.0 | 28.0 | 26.0 | 6.0 | 11.0 | 15.0 | 14.0 | 8.0 |
| 1 | 20801 | Cristiano Ronaldo | 33 | 94 | Juventus | €77M | Right | 4.0 | ST | 6.20 | ... | 85.0 | 95.0 | 28.0 | 31.0 | 23.0 | 7.0 | 11.0 | 15.0 | 14.0 | 11.0 |
| 2 | 190871 | Neymar Jr | 26 | 92 | Paris Saint-Germain | €118.5M | Right | 5.0 | LW | 5.90 | ... | 81.0 | 94.0 | 27.0 | 24.0 | 33.0 | 9.0 | 9.0 | 15.0 | 15.0 | 11.0 |
| 3 | 193080 | De Gea | 27 | 91 | Manchester United | €72M | Right | 3.0 | GK | 6.40 | ... | 40.0 | 68.0 | 15.0 | 21.0 | 13.0 | 90.0 | 85.0 | 87.0 | 88.0 | 94.0 |
| 4 | 192985 | K. De Bruyne | 27 | 91 | Manchester City | €102M | Right | 5.0 | RCM | 5.11 | ... | 79.0 | 88.0 | 68.0 | 58.0 | 51.0 | 15.0 | 13.0 | 5.0 | 10.0 | 13.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 18202 | 238813 | J. Lundstram | 19 | 47 | Crewe Alexandra | €60K | Right | 2.0 | CM | 5.90 | ... | 43.0 | 45.0 | 40.0 | 48.0 | 47.0 | 10.0 | 13.0 | 7.0 | 8.0 | 9.0 |
| 18203 | 243165 | N. Christoffersson | 19 | 47 | Trelleborgs FF | €60K | Right | 2.0 | ST | 6.30 | ... | 43.0 | 42.0 | 22.0 | 15.0 | 19.0 | 10.0 | 9.0 | 9.0 | 5.0 | 12.0 |
| 18204 | 241638 | B. Worman | 16 | 47 | Cambridge United | €60K | Right | 3.0 | ST | 5.80 | ... | 55.0 | 41.0 | 32.0 | 13.0 | 11.0 | 6.0 | 5.0 | 10.0 | 6.0 | 13.0 |
| 18205 | 246268 | D. Walker-Rice | 17 | 47 | Tranmere Rovers | €60K | Right | 3.0 | RW | 5.10 | ... | 50.0 | 46.0 | 20.0 | 25.0 | 27.0 | 14.0 | 6.0 | 14.0 | 8.0 | 9.0 |
| 18206 | 246269 | G. Nugent | 16 | 46 | Tranmere Rovers | €60K | Right | 3.0 | CM | 5.10 | ... | 33.0 | 43.0 | 40.0 | 43.0 | 50.0 | 10.0 | 15.0 | 9.0 | 12.0 | 9.0 |
18159 rows × 45 columns
striker = players[players['Position'] =='ST']
nonstriker = players[players['Position'] !='ST']
nonstriker['Position']
0 RF
2 LW
3 GK
4 RCM
5 LF
...
18199 CM
18201 RB
18202 CM
18205 RW
18206 CM
Name: Position, Length: 16007, dtype: object
fig,a = plt.subplots(2,2, figsize= (15,10))
a[0][0].bar(striker['Weight'].mean(),striker['Height'].mean())
a[0][0].bar(nonstriker['Weight'].mean(),nonstriker['Height'].mean())
a[0][0].set_xlabel('Weight')
a[0][0].set_ylabel('Height')
a[0][0].legend(labels=['Striker', 'Non Striker'])
a[0][0].set_title("Striker vs Non Striker height and Weight")
a[0][1].bar(striker['Vision'].mean(),striker['Composure'].mean())
a[0][1].bar(nonstriker['Vision'].mean(),nonstriker['Composure'].mean())
a[0][1].set_xlabel('Vision')
a[0][1].set_ylabel('Composure')
a[0][1].legend(labels=['Striker', 'Non Striker'])
a[0][1].set_title("Striker vs Non Striker Vision and Composure")
a[1][0].bar(striker['Agility'].mean(),striker['Strength'].mean())
a[1][0].bar(nonstriker['Agility'].mean(),nonstriker['Strength'].mean())
a[1][0].set_xlabel('Agility')
a[1][0].set_ylabel('Strength')
a[1][0].legend(labels=['Striker', 'Non Striker'])
a[1][0].set_title("Striker vs Non Striker Agility and Strength")
a[1][1].bar(striker['StandingTackle'].mean(),striker['SlidingTackle'].mean())
a[1][1].bar(nonstriker['StandingTackle'].mean(),nonstriker['SlidingTackle'].mean())
a[1][1].set_xlabel('StandingTackle')
a[1][1].set_ylabel('SlidingTackle')
a[1][1].legend(labels=['Striker', 'Non Striker'])
a[1][1].set_title("Striker vs Non Striker StandingTackle and SlidingTackle")
plt.show()
gk= players[players['Position'] =='GK']
ls= players[players['Position'] =='LS']
cam= players[players['Position'] =='CAM']
rcm= players[players['Position'] =='RCM']
lwb= players[players['Position'] =='LWB']
lcb= players[players['Position'] =='LCB']
rb= players[players['Position'] =='RB']
fig = plt.figure(figsize= (15,5))
plt.bar(striker['Strength'].mean(),striker['Stamina'].mean())
plt.bar(nonstriker['Strength'].mean(),nonstriker['Stamina'].mean())
plt.bar(gk['Strength'].mean(),gk['Stamina'].mean())
plt.bar(cam['Strength'].mean(),cam['Stamina'].mean())
plt.bar(rcm['Strength'].mean(),rcm['Stamina'].mean())
plt.bar(lwb['Strength'].mean(),lwb['Stamina'].mean())
plt.bar(lcb['Strength'].mean(),lcb['Stamina'].mean())
plt.bar(rb['Strength'].mean(),rb['Stamina'].mean())
plt.xlabel('Strength')
plt.ylabel('Stamina')
plt.title("Striker vs Non Striker Strength and Stamina")
plt.legend(labels=['Striker', 'Non Striker', 'GoalKeeper', 'Center Attaking Midfilder', 'Right Center Midfielder', 'Left Wing Back',
'Left Center Back', 'Right Back'])
plt.show()
fig = plt.figure(figsize= (15,5))
plt.bar(striker['Acceleration'].mean(),striker['SprintSpeed'].mean())
plt.bar(nonstriker['Acceleration'].mean(),nonstriker['SprintSpeed'].mean())
plt.bar(gk['Acceleration'].mean(),gk['SprintSpeed'].mean())
plt.bar(cam['Acceleration'].mean(),cam['SprintSpeed'].mean())
plt.bar(rcm['Acceleration'].mean(),rcm['SprintSpeed'].mean())
plt.bar(lwb['Acceleration'].mean(),lwb['SprintSpeed'].mean())
plt.bar(lcb['Acceleration'].mean(),lcb['SprintSpeed'].mean())
plt.bar(rb['Acceleration'].mean(),rb['SprintSpeed'].mean())
plt.xlabel('Acceleration')
plt.ylabel('SprintSpeed')
plt.title("Striker vs Non Striker Acceleration and SprintSpeed")
plt.legend(labels=['Striker', 'Non Striker', 'GoalKeeper', 'Center Attaking Midfilder', 'Right Center Midfielder', 'Left Wing Back',
'Left Center Back', 'Right Back'])
plt.show()
fig = plt.figure(figsize= (15,5))
plt.bar(striker['Finishing'].mean(),striker['Dribbling'].mean())
plt.bar(nonstriker['Finishing'].mean(),nonstriker['Dribbling'].mean())
plt.bar(gk['Finishing'].mean(),gk['Dribbling'].mean())
plt.bar(cam['Finishing'].mean(),cam['Dribbling'].mean())
plt.bar(rcm['Finishing'].mean(),rcm['Dribbling'].mean())
plt.bar(lwb['Finishing'].mean(),lwb['Dribbling'].mean())
plt.bar(lcb['Finishing'].mean(),lcb['Dribbling'].mean())
plt.bar(rb['Finishing'].mean(),rb['Dribbling'].mean())
plt.bar(players['Finishing'][players['Name'] =='Cristiano Ronaldo'].mean(),players['Dribbling'][players['Name'] =='Cristiano Ronaldo'].mean())
plt.bar(players['Finishing'][players['Name'] =='R. Lewandowski'].mean(),players['Dribbling'][players['Name'] =='R. Lewandowski'].mean())
plt.bar(players['Finishing'][players['Name'] =='S. Agüero'].mean(),players['Dribbling'][players['Name'] =='S. Agüero'].mean())
plt.legend(labels=['Striker', 'Non Striker', 'GoalKeeper', 'Center Attaking Midfilder', 'Right Center Midfielder', 'Left Wing Back',
'Left Center Back', 'Right Back', 'Ronaldo', 'R. Lewandowski' ,'S. Agüero'])
plt.xlabel('Finishing')
plt.ylabel('Dribbling')
plt.title("Striker vs Non Striker Finishing and Dribbling")
plt.legend(labels=['Striker', 'Non Striker', 'GoalKeeper', 'Center Attaking Midfilder', 'Right Center Midfielder', 'Left Wing Back',
'Left Center Back', 'Right Back', 'Ronaldo', 'R. Lewandowski' ,'S. Agüero'])
plt.show()
fig = plt.figure(figsize= (15,5))
plt.bar(striker['BallControl'].mean(),striker['FKAccuracy'].mean())
plt.bar(nonstriker['BallControl'].mean(),nonstriker['FKAccuracy'].mean())
plt.bar(gk['BallControl'].mean(),gk['FKAccuracy'].mean())
plt.bar(cam['BallControl'].mean(),cam['FKAccuracy'].mean())
plt.bar(rcm['BallControl'].mean(),rcm['FKAccuracy'].mean())
plt.bar(lwb['BallControl'].mean(),lwb['FKAccuracy'].mean())
plt.bar(lcb['BallControl'].mean(),lcb['FKAccuracy'].mean())
plt.bar(rb['BallControl'].mean(),rb['FKAccuracy'].mean())
plt.xlabel('BallControl')
plt.ylabel('Free Kick Accuracy')
plt.title("Striker vs Non Striker BallControl and Free Kick Accuracy")
plt.legend(labels=['Striker', 'Non Striker', 'GoalKeeper', 'Center Attaking Midfilder', 'Right Center Midfielder', 'Left Wing Back',
'Left Center Back', 'Right Back'])
plt.show()
fig = plt.figure(figsize= (15,6))
plt.bar(striker['LongPassing'].mean(),striker['ShortPassing'].mean())
plt.bar(nonstriker['LongPassing'].mean(),nonstriker['ShortPassing'].mean())
plt.bar(gk['LongPassing'].mean(),gk['ShortPassing'].mean())
plt.bar(cam['LongPassing'].mean(),cam['ShortPassing'].mean())
plt.bar(rcm['LongPassing'].mean(),rcm['ShortPassing'].mean())
plt.bar(lwb['LongPassing'].mean(),lwb['ShortPassing'].mean())
plt.bar(lcb['LongPassing'].mean(),lcb['ShortPassing'].mean())
plt.bar(rb['LongPassing'].mean(),rb['ShortPassing'].mean())
plt.title("Striker vs Non Striker Long Passing and Short Passing")
plt.legend(labels=['Striker', 'Non Striker', 'GoalKeeper', 'Center Attaking Midfilder', 'Right Center Midfielder', 'Left Wing Back',
'Left Center Back', 'Right Back'])
plt.xlabel('Long Passing')
plt.ylabel('Short Passing')
plt.show()
fig = plt.figure(figsize= (15,6))
plt.bar(striker['LongPassing'].mean(),striker['ShortPassing'].mean())
plt.bar(nonstriker['LongPassing'].mean(),nonstriker['ShortPassing'].mean())
plt.bar(gk['LongPassing'].mean(),gk['ShortPassing'].mean())
plt.bar(cam['LongPassing'].mean(),cam['ShortPassing'].mean())
plt.bar(rcm['LongPassing'].mean(),rcm['ShortPassing'].mean())
plt.bar(lwb['LongPassing'].mean(),lwb['ShortPassing'].mean())
plt.bar(lcb['LongPassing'].mean(),lcb['ShortPassing'].mean())
plt.bar(rb['LongPassing'].mean(),rb['ShortPassing'].mean())
plt.bar(players['LongPassing'][players['Name'] =='Cristiano Ronaldo'].mean(),players['ShortPassing'][players['Name'] =='Cristiano Ronaldo'].mean())
plt.bar(players['LongPassing'][players['Name'] =='R. Lewandowski'].mean(),players['ShortPassing'][players['Name'] =='R. Lewandowski'].mean())
plt.bar(players['LongPassing'][players['Name'] =='B. Worman'].mean(),players['ShortPassing'][players['Name'] =='B. Worman'].mean())
plt.title("Striker vs Non Striker Long Passing and Short Passing")
plt.legend(labels=['Striker', 'Non Striker', 'GoalKeeper', 'Center Attaking Midfilder', 'Right Center Midfielder', 'Left Wing Back',
'Left Center Back', 'Right Back', 'Ronaldo', 'R. Lewandowski' ,'B. Worman'])
plt.xlabel('Long Passing')
plt.ylabel('Short Passing')
plt.show()
striker
| ID | Name | Age | Overall | Club | Value | Preferred Foot | Weak Foot | Position | Height | ... | Penalties | Composure | Marking | StandingTackle | SlidingTackle | GKDiving | GKHandling | GKKicking | GKPositioning | GKReflexes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 20801 | Cristiano Ronaldo | 33 | 94 | Juventus | €77M | Right | 4.0 | ST | 6.2 | ... | 85.0 | 95.0 | 28.0 | 31.0 | 23.0 | 7.0 | 11.0 | 15.0 | 14.0 | 11.0 |
| 10 | 188545 | R. Lewandowski | 29 | 90 | FC Bayern München | €77M | Right | 4.0 | ST | 6.0 | ... | 88.0 | 86.0 | 34.0 | 42.0 | 19.0 | 15.0 | 6.0 | 12.0 | 8.0 | 10.0 |
| 16 | 202126 | H. Kane | 24 | 89 | Tottenham Hotspur | €83.5M | Right | 4.0 | ST | 6.2 | ... | 90.0 | 89.0 | 56.0 | 36.0 | 38.0 | 8.0 | 10.0 | 11.0 | 14.0 | 11.0 |
| 23 | 153079 | S. Agüero | 30 | 89 | Manchester City | €64.5M | Right | 4.0 | ST | 5.8 | ... | 83.0 | 90.0 | 30.0 | 20.0 | 12.0 | 13.0 | 15.0 | 6.0 | 11.0 | 14.0 |
| 36 | 173731 | G. Bale | 28 | 88 | Real Madrid | €60M | Left | 3.0 | ST | 6.1 | ... | 76.0 | 86.0 | 54.0 | 55.0 | 52.0 | 15.0 | 15.0 | 11.0 | 5.0 | 6.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 18177 | 238550 | R. Roache | 18 | 48 | Blackpool | €70K | Right | 3.0 | ST | 5.1 | ... | 55.0 | 49.0 | 18.0 | 16.0 | 11.0 | 6.0 | 9.0 | 11.0 | 7.0 | 12.0 |
| 18189 | 240160 | A. Kaltner | 18 | 47 | SpVgg Unterhaching | €60K | Right | 3.0 | ST | 5.1 | ... | 48.0 | 37.0 | 28.0 | 15.0 | 22.0 | 15.0 | 5.0 | 14.0 | 12.0 | 8.0 |
| 18200 | 231381 | J. Young | 18 | 47 | Swindon Town | €60K | Left | 2.0 | ST | 5.9 | ... | 58.0 | 50.0 | 15.0 | 17.0 | 14.0 | 11.0 | 15.0 | 12.0 | 12.0 | 11.0 |
| 18203 | 243165 | N. Christoffersson | 19 | 47 | Trelleborgs FF | €60K | Right | 2.0 | ST | 6.3 | ... | 43.0 | 42.0 | 22.0 | 15.0 | 19.0 | 10.0 | 9.0 | 9.0 | 5.0 | 12.0 |
| 18204 | 241638 | B. Worman | 16 | 47 | Cambridge United | €60K | Right | 3.0 | ST | 5.8 | ... | 55.0 | 41.0 | 32.0 | 13.0 | 11.0 | 6.0 | 5.0 | 10.0 | 6.0 | 13.0 |
2152 rows × 45 columns
vector1 = np.array(striker['LongPassing'].mean())
vector2 = np.array(players['LongPassing'][players['Name'] =='R. Roache'])
vector3 = np.array(striker['LongPassing'].mean())
vector4 = np.array(players['LongPassing'][players['Name'] =='R. Lewandowski'])
vector5 = np.array(striker['LongPassing'].mean())
vector6 = np.array(players['LongPassing'][players['Name'] =='B. Worman'])
op1=np.sqrt(np.sum(np.square(vector1-vector2)))
op2=np.sqrt(np.sum(np.square(vector3-vector4)))
op3=np.sqrt(np.sum(np.square(vector5-vector6)))
print(op1)
print(op2)
print(op3)
vector4
12.576672862453535 20.423327137546465 16.576672862453535
array([65.])
pla=players
pla
| ID | Name | Age | Overall | Club | Value | Preferred Foot | Weak Foot | Position | Height | ... | Penalties | Composure | Marking | StandingTackle | SlidingTackle | GKDiving | GKHandling | GKKicking | GKPositioning | GKReflexes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 158023 | L. Messi | 31 | 94 | FC Barcelona | €110.5M | Left | 4.0 | RF | 5.70 | ... | 75.0 | 96.0 | 33.0 | 28.0 | 26.0 | 6.0 | 11.0 | 15.0 | 14.0 | 8.0 |
| 1 | 20801 | Cristiano Ronaldo | 33 | 94 | Juventus | €77M | Right | 4.0 | ST | 6.20 | ... | 85.0 | 95.0 | 28.0 | 31.0 | 23.0 | 7.0 | 11.0 | 15.0 | 14.0 | 11.0 |
| 2 | 190871 | Neymar Jr | 26 | 92 | Paris Saint-Germain | €118.5M | Right | 5.0 | LW | 5.90 | ... | 81.0 | 94.0 | 27.0 | 24.0 | 33.0 | 9.0 | 9.0 | 15.0 | 15.0 | 11.0 |
| 3 | 193080 | De Gea | 27 | 91 | Manchester United | €72M | Right | 3.0 | GK | 6.40 | ... | 40.0 | 68.0 | 15.0 | 21.0 | 13.0 | 90.0 | 85.0 | 87.0 | 88.0 | 94.0 |
| 4 | 192985 | K. De Bruyne | 27 | 91 | Manchester City | €102M | Right | 5.0 | RCM | 5.11 | ... | 79.0 | 88.0 | 68.0 | 58.0 | 51.0 | 15.0 | 13.0 | 5.0 | 10.0 | 13.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 18202 | 238813 | J. Lundstram | 19 | 47 | Crewe Alexandra | €60K | Right | 2.0 | CM | 5.90 | ... | 43.0 | 45.0 | 40.0 | 48.0 | 47.0 | 10.0 | 13.0 | 7.0 | 8.0 | 9.0 |
| 18203 | 243165 | N. Christoffersson | 19 | 47 | Trelleborgs FF | €60K | Right | 2.0 | ST | 6.30 | ... | 43.0 | 42.0 | 22.0 | 15.0 | 19.0 | 10.0 | 9.0 | 9.0 | 5.0 | 12.0 |
| 18204 | 241638 | B. Worman | 16 | 47 | Cambridge United | €60K | Right | 3.0 | ST | 5.80 | ... | 55.0 | 41.0 | 32.0 | 13.0 | 11.0 | 6.0 | 5.0 | 10.0 | 6.0 | 13.0 |
| 18205 | 246268 | D. Walker-Rice | 17 | 47 | Tranmere Rovers | €60K | Right | 3.0 | RW | 5.10 | ... | 50.0 | 46.0 | 20.0 | 25.0 | 27.0 | 14.0 | 6.0 | 14.0 | 8.0 | 9.0 |
| 18206 | 246269 | G. Nugent | 16 | 46 | Tranmere Rovers | €60K | Right | 3.0 | CM | 5.10 | ... | 33.0 | 43.0 | 40.0 | 43.0 | 50.0 | 10.0 | 15.0 | 9.0 | 12.0 | 9.0 |
18159 rows × 45 columns
df = pd.DataFrame(columns=['Name','LongPassing' ])
for i in pla['Name']:
vector1 = np.array(striker['LongPassing'].mean())
vector2 = np.array(pla['LongPassing'][pla['Name'] == i])
man=np.sqrt(np.sum(np.square(vector1-vector2)))
df = df.append({'Name': i,'LongPassing': man}, ignore_index=True)
df1 = pd.DataFrame(columns=['Name','BallControl' ])
for i in pla['Name']:
vector1 = np.array(striker['BallControl'].mean())
vector2 = np.array(pla['BallControl'][pla['Name'] == i])
man=np.sqrt(np.sum(np.square(vector1-vector2)))
df1 = df1.append({'Name': i,'BallControl': man}, ignore_index=True)
df2 = pd.DataFrame(columns=['Name','Finishing' ])
for i in pla['Name']:
vector1 = np.array(striker['Finishing'].mean())
vector2 = np.array(pla['Finishing'][pla['Name'] == i])
man=np.sqrt(np.sum(np.square(vector1-vector2)))
df2 = df2.append({'Name': i,'Finishing': man}, ignore_index=True)
df3 = pd.DataFrame(columns=['Name','Dribbling' ])
for i in pla['Name']:
vector1 = np.array(striker['Dribbling'].mean())
vector2 = np.array(pla['Dribbling'][pla['Name'] == i])
man=np.sqrt(np.sum(np.square(vector1-vector2)))
df3 = df3.append({'Name': i,'Dribbling': man}, ignore_index=True)
df4 = pd.DataFrame(columns=['Name','Weight' ])
for i in pla['Name']:
vector1 = np.array(striker['Weight'].mean())
vector2 = np.array(pla['Weight'][pla['Name'] == i])
man=np.sqrt(np.sum(np.square(vector1-vector2)))
df4 = df4.append({'Name': i,'Weight': man}, ignore_index=True)
df5 = pd.DataFrame(columns=['Name','Position' ])
for i in pla['Name']:
vector2 = np.array(pla['Position'][pla['Name'] == i])
df5 = df5.append({'Name': i,'Position': vector2}, ignore_index=True)
df5['Position']= df5['Position'].astype(str).str[2:-2]
dfs = [df5,df, df1, df2, df3,df4]
dfs = [df.set_index('Name') for df in dfs]
#dfs[0].join(dfs[1:])
dfs=pd.DataFrame().join(dfs, how="outer")
print(dfs.reindex())
Position LongPassing BallControl Finishing Dribbling \
Name
A. Abang ST 16.576673 4.638941 5.421004 6.695632
A. Abdellaoui LB 0.423327 16.638941 41.421004 23.695632
A. Abdennour CB 17.423327 12.638941 18.421004 14.695632
A. Abdi CM 27.423327 10.361059 5.421004 7.304368
A. Abdu Jaber ST 4.576673 4.638941 0.421004 1.695632
... ... ... ... ... ...
Óscar Gil RB 10.576673 13.638941 36.421004 5.695632
Óscar Pinchi LM 18.423327 6.361059 0.421004 6.304368
Óscar Plano LM 20.423327 9.361059 7.578996 9.304368
Óscar Valentín CDM 18.423327 4.361059 38.421004 4.695632
Óscar Whalley GK 14.576673 45.638941 49.421004 46.695632
Weight
Name
A. Abang 7.916822
A. Abdellaoui 3.916822
A. Abdennour 16.083178
A. Abdi 3.916822
A. Abdu Jaber 14.916822
... ...
Óscar Gil 3.916822
Óscar Pinchi 31.916822
Óscar Plano 9.916822
Óscar Valentín 25.916822
Óscar Whalley 7.083178
[2908981 rows x 6 columns]
StrikersSimilarity= dfs[dfs['Position'] == 'ST']
StrikersSimilarity['Avg']= (StrikersSimilarity['LongPassing'] + StrikersSimilarity['BallControl'] + StrikersSimilarity['Finishing'] + StrikersSimilarity['Dribbling'] + StrikersSimilarity['Weight'])/5
StrikersSimilarity
| Position | LongPassing | BallControl | Finishing | Dribbling | Weight | Avg | |
|---|---|---|---|---|---|---|---|
| Name | |||||||
| A. Abang | ST | 16.576673 | 4.638941 | 5.421004 | 6.695632 | 7.916822 | 8.249814 |
| A. Abdu Jaber | ST | 4.576673 | 4.638941 | 0.421004 | 1.695632 | 14.916822 | 5.249814 |
| A. Adam | ST | 10.576673 | 3.638941 | 0.578996 | 0.695632 | 9.916822 | 5.081413 |
| A. Addai | ST | 12.576673 | 13.638941 | 9.421004 | 13.695632 | 5.916822 | 11.049814 |
| A. Agouda | ST | 11.576673 | 11.638941 | 9.421004 | 17.695632 | 14.916822 | 13.049814 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| Zhu Jianrong | ST | 8.423327 | 15.638941 | 0.421004 | 7.695632 | 12.083178 | 8.852416 |
| Zé Luís | ST | 0.576673 | 17.361059 | 7.578996 | 17.304368 | 25.083178 | 13.580855 |
| Zé Turbo | ST | 7.576673 | 0.361059 | 1.421004 | 0.304368 | 12.083178 | 4.349257 |
| Álex López | ST | 4.423327 | 6.361059 | 5.421004 | 1.304368 | 3.916822 | 4.285316 |
| Éder | ST | 18.423327 | 5.361059 | 7.578996 | 4.304368 | 10.083178 | 9.150186 |
1892 rows × 7 columns
StrikersSimiy=StrikersSimilarity.sort_values(by=['Avg'], inplace=True)
StrikersSimilarity
| Position | LongPassing | BallControl | Finishing | Dribbling | Weight | Avg | |
|---|---|---|---|---|---|---|---|
| Name | |||||||
| Miguel | ST | 1.576673 | 0.361059 | 1.421004 | 0.304368 | 1.083178 | 0.949257 |
| L. Rosseti | ST | 0.423327 | 0.638941 | 0.421004 | 0.304368 | 3.083178 | 0.974164 |
| N. Vergos | ST | 0.576673 | 0.361059 | 0.421004 | 4.695632 | 1.083178 | 1.427509 |
| P. Caballero | ST | 0.423327 | 0.361059 | 0.578996 | 0.695632 | 5.083178 | 1.428439 |
| Juan Delgado | ST | 0.576673 | 0.638941 | 1.578996 | 1.695632 | 3.083178 | 1.514684 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| S. Agüero | ST | 19.423327 | 25.361059 | 26.578996 | 26.304368 | 14.916822 | 22.516914 |
| G. Bale | ST | 35.423327 | 21.361059 | 19.578996 | 24.304368 | 12.083178 | 22.550186 |
| R. Lukaku | ST | 28.423327 | 8.361059 | 20.578996 | 17.304368 | 38.083178 | 22.550186 |
| T. Chorý | ST | 23.576673 | 19.638941 | 9.421004 | 14.695632 | 54.083178 | 24.283086 |
| Cristiano Ronaldo | ST | 32.423327 | 30.361059 | 27.578996 | 25.304368 | 14.083178 | 25.950186 |
1892 rows × 7 columns